"""
*1. import the image with a red rectangle to get the object
*2. import other png images to use that as a background
*3. Crop the region of interest from the img with the target object
*4. Write a dataloader that pastes the object of interest on a random background image
*5. Add very many transformations to this dataloader - COULD ADD MANY MORE
*6. Write up a neural net class for the detection
*7. Train the neural net and see how the training goes
8. If things go smoothly check the model on other datasets with different inter-class objects.
E.g., different triangles
9. Check the effect of pretraining using available datasets
10. Read about one data sample transfer learning
"""
import os
import time
import numpy as np
import cv2
import matplotlib.pyplot as plt
import torch
import torchvision
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from tensorboardX import SummaryWriter
from torchsummary import summary
backgrounds_dir = './dataset/backgrounds/'
target_dir = './dataset/target/'
test_dir = './dataset/test/'
# see how the target looks like
img = cv2.imread(target_dir+'target_shape_marked.png')
print('image_shape: ', img.shape)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
#plt the original image
plt.figure(figsize=(16,16))
plt.subplot(121), plt.imshow(img)
# Find red pixels - maybe the slowest method :)
upper_left_indicator = 0
for row_idx in range(img.shape[0]):
for col_idx in range(img.shape[1]):
r = img[row_idx][col_idx][0]
g = img[row_idx][col_idx][1]
b = img[row_idx][col_idx][2]
if r>100 and g<100 and b<100 and upper_left_indicator == 0:
#upper left row and column idxs
uleft_colrow_idx = (col_idx, row_idx)
upper_left_indicator=1
if upper_left_indicator == 1 and r>100 and g<100 and b<100:
#lower right row and column idxs
lright_colrow_idx = (col_idx, row_idx)
print(uleft_colrow_idx)
print(lright_colrow_idx)
img_marked = cv2.circle(img.copy(), uleft_colrow_idx ,50, (0,255,0), 10)
img_marked = cv2.circle(img_marked, lright_colrow_idx ,50, (0,255,0), 10)
#plot the image with added circles to check if the corner detection is fine
plt.subplot(122), plt.imshow(img_marked)
plt.show()
# see how the background files look like
fnames = os.listdir(backgrounds_dir)
for fname in fnames:
bg_img = cv2.imread(backgrounds_dir+fname, 0)
assert (img.shape[0], img.shape[1]) == (bg_img.shape[0], bg_img.shape[1]) , 'Check dimensions'
print('we are cool!')
# cropping
col_ul, row_ul = uleft_colrow_idx
col_lr, row_lr = lright_colrow_idx
print(row_ul,row_lr) , print(col_ul,col_lr)
#object of interest
#red rectangle thickness
eps = 5
ooi = img[row_ul+eps:row_lr-eps,col_ul+eps:col_lr-eps,:].copy()
print(ooi.shape)
plt.imshow(ooi)
plt.show()
h,w,c = ooi.shape
H,W,C = img.shape
print('h/H:{:.2f}, w/W:{:.2f}'.format(h/H, w/W))
#convert to gray
ooi_g = cv2.cvtColor(ooi, cv2.COLOR_RGB2GRAY)
plt.imshow(ooi_g, cmap='gray')
plt.show()
import imgaug as ia
from imgaug import augmenters as iaa
ia.seed(0)
aug = iaa.Affine(
scale={"x": (0.1, 1.5), "y": (0.1, 1.5)},
rotate=(-90,+90),
shear=(-10,10),
cval=(255),
fit_output=True)
img_batch = [ooi_g, ooi_g,ooi_g,ooi_g,ooi_g,ooi_g,ooi_g,ooi_g,ooi_g]
augmented_batch= aug(images = img_batch)
fig = plt.figure(figsize=(16,16))
for idx, aug_img in enumerate(augmented_batch):
plt.subplot(3,3,idx+1)
plt.imshow(aug_img, cmap='gray')
plt.show()
import imgaug as ia
from imgaug import augmenters as iaa
seq_aug = iaa.Sequential([
iaa.Fliplr(0.5), # horizontal flips
# Apply affine transformations to each image.
# Scale/zoom them, translate/move them, rotate them and shear them.
iaa.Affine(
scale={"x": (0.4, 1.1), "y": (0.4, 1.1)},
translate_percent={"x": (-0.1, 0.1), "y": (-0.1, 0.1)},
rotate=(-25, 25),
shear=(-8, 8),
cval=(255)
)
], random_order=True) # apply augmenters in random order
augmented_batch = seq_aug(images=img_batch)
augmented_batch = np.hstack(augmented_batch)
print('Augmented_batch_shape: ',augmented_batch.shape)
ia.imshow(augmented_batch)
bg_img = cv2.imread(backgrounds_dir+fnames[0],0)
t_img = ooi_g.copy()
print('bg_img_shape: ', bg_img.shape)
print('crop_img_shape: ', ooi_g.shape)
SIZE = 256
#background and target specs
bg_H, bg_W = bg_img.shape
h, w = t_img.shape
t_dim = (int(SIZE/bg_W*w),int(SIZE/bg_H*h))
bg_img = cv2.resize(bg_img,(SIZE,SIZE), cv2.INTER_AREA)
t_img = cv2.resize(t_img,t_dim, cv2.INTER_LANCZOS4)
print('bg_img_shape: ', bg_img.shape)
print('crop_img_shape: ',t_img.shape)
plt.figure(figsize=(16,16))
plt.subplot(121),plt.imshow(bg_img, cmap='gray')
plt.subplot(122), plt.imshow(t_img, cmap='gray')
plt.plot(70,50,'or')
plt.xlim([0,SIZE])
plt.ylim([0,SIZE])
plt.show()
#do transformations
seq_aug = iaa.Sequential([
iaa.Fliplr(0.5), # horizontal flips
iaa.Affine(
scale={"x": (0.9, 1.1), "y": (0.9, 1.1)},
translate_percent={"x": (-0.05, 0.05), "y": (-0.05, 0.05)},
rotate=(-90, 90),
shear=(-16, 16),
cval=(255),
fit_output=False
)
], random_order=True) # apply augmenters in random order
augmented_t = seq_aug(image = t_img)
plt.figure(figsize=(16,6))
plt.subplot(121),plt.imshow(t_img, cmap = 'gray')
plt.subplot(122), plt.imshow(augmented_t, cmap = 'gray')
plt.show()
print(augmented_t.shape)
#paste on top of the document
#get resized specs
H_pix, W_pix = bg_img.shape
h_pix,w_pix = augmented_t.shape
#sample upper left col and row idxs
ulcol_idx = np.random.randint(0,(W_pix-w_pix))
ulrow_idx = np.random.randint(0,(H_pix-h_pix))
crow_idx = ulrow_idx + h_pix//2
ccol_idx = ulcol_idx + w_pix//2
STRIDE = 32
target = np.zeros((SIZE//STRIDE, SIZE//STRIDE), dtype = np.uint8)
target[crow_idx//32,ccol_idx//32] = 1
#add target on top the background
fused_img = bg_img.copy()
fused_img[ulrow_idx:ulrow_idx+h_pix,ulcol_idx:ulcol_idx+w_pix] = augmented_t
plt.figure(figsize=(16,16))
plt.subplot(121),plt.imshow(fused_img, cmap='gray')
plt.subplot(122),plt.imshow(target, cmap='gray')
plt.show()
STRIDE = 32
N_max = 9
N = np.random.randint(1,N_max+1)
print('N:', N)
target = np.zeros((SIZE//STRIDE, SIZE//STRIDE), dtype = np.uint8)
bg_canvas = np.zeros_like(bg_img)
for i in range(N):
#we can add resize here
augmented_t = seq_aug(image = t_img)
#get resized specs
H_pix, W_pix = bg_img.shape
h_pix, w_pix = augmented_t.shape
if i == 0:
#sample upper left col and row idxs
ulcol_idx = np.random.randint(0,(W_pix-w_pix))
ulrow_idx = np.random.randint(0,(H_pix-h_pix))
#get the idxs for the center pixel
crow_idx = ulrow_idx + h_pix//2
ccol_idx = ulcol_idx + w_pix//2
#add target on top the background
fused_img = bg_img.copy()
fused_img[ulrow_idx:ulrow_idx+h_pix,ulcol_idx:ulcol_idx+w_pix] = augmented_t
#create the label matrix
target[crow_idx//STRIDE,ccol_idx//STRIDE] = 1
#add changes to the background canvas
bg_canvas[ulrow_idx:ulrow_idx+h_pix,ulcol_idx:ulcol_idx+w_pix] = 1
else:
dummy_canvas = np.ones_like(bg_canvas)
while (dummy_canvas*bg_canvas).sum() > 0:
#sample upper left col and row idxs
ulcol_idx = np.random.randint(0,(W_pix-w_pix))
ulrow_idx = np.random.randint(0,(H_pix-h_pix))
#get the idxs for the center pixel
crow_idx = ulrow_idx + h_pix//2
ccol_idx = ulcol_idx + w_pix//2
dummy_canvas = np.zeros_like(bg_canvas)
dummy_canvas[ulrow_idx:ulrow_idx+h_pix,ulcol_idx:ulcol_idx+w_pix] = 1
#add target on top the background
fused_img[ulrow_idx:ulrow_idx+h_pix,ulcol_idx:ulcol_idx+w_pix] = augmented_t
#create the label matrix
target[crow_idx//STRIDE,ccol_idx//STRIDE] = 1
#add changes to the background canvas
bg_canvas[ulrow_idx:ulrow_idx+h_pix,ulcol_idx:ulcol_idx+w_pix] = 1
plt.figure(figsize=(16,16))
plt.subplot(121),plt.imshow(fused_img)
plt.subplot(122),plt.imshow(target)
plt.show()
import imgaug as ia
from imgaug import augmenters as iaa
class OneShot_Dataset(Dataset):
def __init__(self, background_dir = './dataset/backgrounds/', target_dir= './dataset/target/',
target_fname = 'target_shape_marked.png'):
self.background_dir = background_dir
self.target_dir = target_dir
self.background_fnames = os.listdir(self.background_dir)
self.target_fname = target_fname
self.idx_range = int(len(self.background_fnames))
self.target_image = self.crop_the_target_shape()
self.augmentation_function = self.sequential_augmentation_function()
def __len__(self):
return int(self.idx_range)
def __getitem__(self, index):
#read a background
bg_path = os.path.join(self.background_dir, self.background_fnames[index])
bg_img = cv2.imread(bg_path,0)
image_npa, label_npa = self.create_io_arrays(bg_img)
input_tensor = torch.from_numpy(image_npa).float()
output_tensor = torch.from_numpy(label_npa).float()
return input_tensor, output_tensor
def crop_the_target_shape(self):
# see how the target looks like
img = cv2.cvtColor(cv2.imread(target_dir+'target_shape_marked.png'), cv2.COLOR_BGR2RGB)
print('document_image_shape: ', img.shape)
# Find red pixels - maybe the slowest method :)
upper_left_indicator = 0
for row_idx in range(img.shape[0]):
for col_idx in range(img.shape[1]):
r = img[row_idx][col_idx][0]
g = img[row_idx][col_idx][1]
b = img[row_idx][col_idx][2]
if r>100 and g<100 and b<100 and upper_left_indicator == 0:
#upper left row and column idxs
uleft_colrow_idx = (col_idx, row_idx)
upper_left_indicator=1
if upper_left_indicator == 1 and r>100 and g<100 and b<100:
#lower right row and column idxs
lright_colrow_idx = (col_idx, row_idx)
img_marked = cv2.circle(img.copy(), uleft_colrow_idx ,50, (0,255,0), 10)
img_marked = cv2.circle(img_marked, lright_colrow_idx ,50, (0,255,0), 10)
#plot the the original image and the image with added circles
fig = plt.figure(figsize=(16,16))
plt.subplot(121), plt.imshow(img)
plt.subplot(122), plt.imshow(img_marked)
plt.show()
#object of interest
eps = 5 #red rectangle thickness
ooi = img[row_ul+eps:row_lr-eps,col_ul+eps:col_lr-eps,:].copy()
#convert to gray
ooi_g = cv2.cvtColor(ooi, cv2.COLOR_RGB2GRAY)
h,w,c = ooi.shape
H,W,C = img.shape
print('object_of_interest_shape: ', ooi.shape)
print('h/H:{:.2f}, w/W:{:.2f}'.format(h/H, w/W))
h_ratio = h/H
w_ratio = w/W
return ooi_g
def sequential_augmentation_function(self):
#do transformations
seq_aug = iaa.Sequential([
iaa.Fliplr(0.5), # horizontal flips
# Apply affine transformations to each image.
# Scale/zoom them, translate/move them, rotate them and shear them.
iaa.Affine(
scale={"x": (0.4, 1.1), "y": (0.4, 1.1)},
translate_percent={"x": (-0.1, 0.1), "y": (-0.1, 0.1)},
rotate=(-25, 25),
shear=(-8, 8),
cval=(255)
)
], random_order=True) # apply augmenters in random order
return seq_aug
def create_io_arrays(self, bg_img, N_max=4, i_size=256, stride = 32):
#background and target specs
bg_H, bg_W = bg_img.shape
h, w = self.target_image.shape
#respective target object dims
t_dim = (int(i_size/bg_W*w),int(i_size/bg_H*h))
#resize bg and target object
bg_img = cv2.resize(bg_img,(i_size,i_size), cv2.INTER_LANCZOS4)
t_img = cv2.resize(self.target_image,t_dim, cv2.INTER_LANCZOS4)
#sample number of objects to dist on the bg
N = np.random.randint(1,N_max+1)
#initialize the target tensor
target_tensor = np.zeros((i_size//stride, i_size//stride), dtype = np.uint8)
#canvas to not to put objects on top of eachother
bg_canvas = np.zeros_like(bg_img)
for i in range(N):
#we can add resize here
augmented_t = self.augmentation_function(image = t_img)
#get resized specs
H_pix, W_pix = bg_img.shape
h_pix, w_pix = augmented_t.shape
if i == 0:
#sample upper left col and row idxs
ulcol_idx = np.random.randint(0,(W_pix-w_pix))
ulrow_idx = np.random.randint(0,(H_pix-h_pix))
#get the idxs for the center pixel
crow_idx = ulrow_idx + h_pix//2
ccol_idx = ulcol_idx + w_pix//2
#add target on top the background
fused_img = bg_img.copy()
fused_img[ulrow_idx:ulrow_idx+h_pix,ulcol_idx:ulcol_idx+w_pix] = augmented_t
#create the label matrix
target_tensor[crow_idx//STRIDE,ccol_idx//STRIDE] = 1
#add changes to the background canvas
bg_canvas[ulrow_idx:ulrow_idx+h_pix,ulcol_idx:ulcol_idx+w_pix] = 1
else:
dummy_canvas = np.ones_like(bg_canvas)
while (dummy_canvas*bg_canvas).sum() > 0:
#sample upper left col and row idxs
ulcol_idx = np.random.randint(0,(W_pix-w_pix))
ulrow_idx = np.random.randint(0,(H_pix-h_pix))
#get the idxs for the center pixel
crow_idx = ulrow_idx + h_pix//2
ccol_idx = ulcol_idx + w_pix//2
dummy_canvas = np.zeros_like(bg_canvas)
dummy_canvas[ulrow_idx:ulrow_idx+h_pix,ulcol_idx:ulcol_idx+w_pix] = 1
#add target on top the background
fused_img[ulrow_idx:ulrow_idx+h_pix,ulcol_idx:ulcol_idx+w_pix] = augmented_t
#create the label matrix
target_tensor[crow_idx//STRIDE,ccol_idx//STRIDE] = 1
#add changes to the background canvas
bg_canvas[ulrow_idx:ulrow_idx+h_pix,ulcol_idx:ulcol_idx+w_pix] = 1
return fused_img, target_tensor
triangle_dataset = OneShot_Dataset()
print('Dataset_length: ',len(triangle_dataset))
#test 1-2
input_tensor, output_tensor = triangle_dataset[0]
print(input_tensor.shape, output_tensor.shape)
input_npa = input_tensor.numpy()
output_npa = output_tensor.numpy()
plt.figure(figsize=(16,8))
plt.subplot(121), plt.imshow(input_npa)
plt.subplot(122), plt.imshow(output_npa)
plt.show()
#define the neural network class
class Mark_1(nn.Module):
def __init__(self):
super(Mark_1, self).__init__()
self.layer1 = nn.Sequential(
nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1),
nn.BatchNorm2d(32),
# nn.LeakyReLU(),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2)
)
self.layer2 = nn.Sequential(
nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
nn.BatchNorm2d(64),
# nn.LeakyReLU(),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2)
)
self.layer3 = nn.Sequential(
nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
nn.BatchNorm2d(128),
# nn.LeakyReLU(),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2)
)
self.layer4 = nn.Sequential(
nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1),
nn.BatchNorm2d(256),
# nn.LeakyReLU(),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2)
)
self.layer5 = nn.Sequential(
nn.Conv2d(256, 512, kernel_size=1, stride=1, padding=0),
nn.BatchNorm2d(512),
# nn.LeakyReLU()
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2)
)
self.layer6 = nn.Sequential(
nn.Conv2d(512, 1, kernel_size=1, stride=1, padding=0),
# nn.Sigmoid()
# nn.LeakyReLU()
)
def forward(self, image):
out = self.layer1(image) # (N,
out = self.layer2(out) # (N,
out = self.layer3(out) # (N,
out = self.layer4(out) # (N,
out = self.layer5(out) # (N,
out = self.layer6(out) # (N,1,16,16)
return out
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print('device : ', device)
model = Mark_1().to(device)
print(model)
summary(model, input_size=(1,256,256))
# create dataloader
train_loader = torch.utils.data.DataLoader(dataset=triangle_dataset,
batch_size=8,
shuffle=True)
learning_rate = 1e-3
batch_size = 32
num_epochs = 30
lmda_noobj = 0.5
criterion_MSE = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(),lr=learning_rate)
loss_list=[]
epoch_iteration_no=0
#training loop
model.train()
for epoch in range(num_epochs):
start_t = time.time()
for i, (images,labels) in enumerate(train_loader):
images = images.view(-1,1,256,256).float().to(device)
labels = labels.view(-1,1,8,8).float().to(device)
# print('images_shape: ',images.shape)
# print('labels_shape: ',labels.shape)
targets_flat = labels[:,0,:,:].flatten().to(device)
if i == 0 and epoch ==0 :
print('i==0,epoch ==0 ',images.shape,labels.shape)
#forward pass
outputs = model(images)
# print('outputs_shape: ',outputs.shape)
#disect the output
outputs_flat = outputs[:,0,:,:].flatten()
#mask the grid cells that has obj cm
mask_obj = targets_flat>0
mask_noobj = targets_flat==0
#objectness loss
obj_loss_obj = criterion_MSE(outputs_flat[mask_obj],targets_flat[mask_obj])
obj_loss_noobj = criterion_MSE(outputs_flat[mask_noobj],targets_flat[mask_noobj])
loss = obj_loss_obj + lmda_noobj*obj_loss_noobj
loss_list.append(loss.item())
#backward and optimize
optimizer.zero_grad()
loss.backward()
optimizer.step()
end_t = time.time()
print('Epoch [{}/{}], loss: {:.12f}, time:{}'.format(epoch+1,num_epochs,loss.item(),end_t-start_t))
print()
# if (epoch+1)%save_ckpt_stepn == 0:
# checkp_name = out_dir+'{}_epoch{}'.format(detector_name,(epoch+1)+(epoch_iteration_no)*(num_epochs))
# print('saving model ckpt... \n',checkp_name)
# torch.save(model.state_dict(),checkp_name)
epoch_iteration_no +=1
plt.plot(loss_list[:])
#Test the model w/ all testing data
dataset = OneShot_Dataset()
data_loader = torch.utils.data.DataLoader(dataset=dataset,
batch_size=1,
shuffle=False)
images_npa = np.zeros((len(data_loader),256,256))
labels_npa = np.zeros((len(data_loader),8,8))
outputs_npa = np.zeros((len(data_loader),8,8))
model.eval() #eval mode (batchnorm uses moving mean/variance instead of mini-batch mean/variance)
with torch.no_grad():
for idx, (images,labels) in enumerate(data_loader):
images = images.view(-1,1,256,256).float().to(device)
targets = labels.view(-1,1,8,8).float().to(device)
#class_preds,x_preds,y_preds = model(images)
out = model(images)
images_npa[idx] = images.squeeze().detach().numpy()
labels_npa[idx] = targets.squeeze().detach().numpy()
outputs_npa[idx] = out.squeeze().detach().numpy()
print('images_npa: ',images_npa.shape)
print('labels_npa: ',labels_npa.shape)
print('outputs_npa: ',outputs_npa.shape)
idx= 4
row=1
col=3
plt.figure(figsize=(16,8))
plt.subplot(row,col,1), plt.imshow(images_npa[idx])
plt.subplot(row,col,2), plt.imshow(labels_npa[idx])
plt.subplot(row,col,3), plt.imshow(outputs_npa[idx])
plt.plot()
# how about testset?
fnames = os.listdir(test_dir)
ex_img = cv2.imread(test_dir+fnames[0],0)
H,W = ex_img.shape
print('H,W: ', ex_img.shape)
print('Number of test examples: ', len(fnames))
#create empth npas
test_images = np.zeros((len(fnames),H,W))
test_images_resized = np.zeros((len(fnames),256,256))
#read and resize all the test files
for idx, fname in enumerate(fnames):
img = cv2.imread(test_dir+fname,0)
img_resized = cv2.resize(img,(256,256),cv2.INTER_LANCZOS4)
test_images[idx] = img
test_images_resized[idx] = img_resized
#create the torch tensor to feed
test_tensor = torch.from_numpy(test_images_resized)
print(test_tensor.shape)
# run the model on the resized test document images
model.eval()
with torch.no_grad():
test_outputs = model(test_tensor.view(-1,1,256,256).float()).squeeze()
test_outputs = test_outputs.numpy()
# Threshold the outputs
test_outputs[test_outputs<0.3]=0
row=14
col=2
fig = plt.figure(figsize=(16,128))
for i in range(len(fnames)):
plt.subplot(row,col,2*i+1), plt.imshow(test_images_resized[i])
plt.subplot(row,col,2*i+2), plt.imshow(test_outputs[i])
plt.show()
test_outputs_resized = np.zeros_like(test_images)
for i in range(11):
test_outputs_resized[i] = cv2.resize(test_outputs[i], (W,H), cv2.INTER_AREA)
plt.imshow(test_outputs_resized[1])
test_outputs_resized.max()
final_heatmaps = np.zeros_like(test_images)
for i in range(11):
final_heatmaps[i] = cv2.addWeighted(test_images[i],0.1,test_outputs_resized[i]*255,0.9,0)
selected_idxs = [0,1,2,3,5,7,8,9,10]
row=len(selected_idxs)
col=2
fig = plt.figure(figsize=(16,80))
subplot_counter = 1
for i in selected_idxs:
fig.add_subplot(row,col,subplot_counter)
plt.imshow(test_images[i],cmap='gray')
subplot_counter+=1
fig.add_subplot(row,col,subplot_counter)
plt.imshow(final_heatmaps[i],cmap='gray')
subplot_counter+=1
plt.show()